##### Preparation of background variables ####
# Step numbers reference to document: 'Data retrieval and preparation.docx'.


### Step 11 ###
# Load the data in R.
library("foreign")
data <- read.spss(file="Backgroundvariables.sav", to.data.frame = TRUE)

### Step 12 ###
# a function to assign the first known value of a person to its case and create a new dataset where one case equals one person.

firstentry <- function(dataset){
  n <- length(unique(dataset$nomem_encr))
  df <- data.frame(nomem_encr=NA,wave=NA,nohouse_encr=NA,geslacht=NA,gebjaar=NA,sted=NA,brutoink_f=NA,oplcat=NA,werving=NA,herkomstgroep=NA,PrimaryFirst=NA)
  pb <- txtProgressBar(min = 0, max = n, style = 3)    # setting up a progress bar
  for (i in 1:n){
    setTxtProgressBar(pb, i)  # update progress bar for every iteration
    df[i,] <- apply(dataset[dataset$nomem_encr==unique(dataset$nomem_encr)[i],],2,function(y) y[which(!is.na(y))[1]])
  }
  close(pb) # closing the progressbar
  return(df)
}

outcome <- firstentry(dataset=data)


# a function to recode the string data into categorical numeric data
numdata <- function(dataset){
  dataset$geslacht <- as.numeric(factor(dataset$geslacht, levels=c("Male",
                                                                       "Female", NA)))
  
  dataset$sted <- as.numeric(factor(dataset$sted, levels=c("Extremely urban", "Very urban", "Moderately urban", "Slightly urban", "Not urban", NA)))
  
  dataset$oplcat <- as.numeric(factor(dataset$oplcat, levels=c("primary school",
                                                                   "vmbo (intermediate secondary education, US: junior high school)",
                                                                   "havo/vwo (higher secondary education/preparatory university education, US: senior high school)",
                                                                   "mbo (intermediate vocational education, US: junior college)",
                                                                   "hbo (higher vocational education, US: college)",
                                                                   "wo (university)", NA)))
  
  dataset$herkomstgroep <- as.numeric(factor(dataset$herkomstgroep, levels=c("Dutch background","First generation foreign, Western background",
                                                                                 "First generation foreign, non-western background",
                                                                                 "Second generation foreign, Western background",
                                                                                 "Second generation foreign, non-western background",
                                                                                 "Origin unknown or part of the information unknown", NA)))
  return(dataset)
}

newdata <- numdata(dataset=outcome)


### Step 13 ###
# Save new file with first entries on background variables as a text file and syntax to read it in spss.

write.foreign(newdata, "backgroundvariables_firstentry.txt", "backgroundvariables_firstentry.sps",   package="SPSS")


### Step 17 ###
# Strange case, a respondent who was born in 2006, probably a misunderstanding.
# Her third entry, 1995, is more likely to be true.
data[data$nomem_encr==817694,]

############# END OF FILE ############################